import ast
import base64
import io
import json
from flask import Blueprint,request
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.neural_network import MLPRegressor
import tensorflow as tf
from mlxtend.frequent_patterns import apriori, association_rules
from mlxtend.preprocessing import TransactionEncoder
import pandas as pd
from math import log
from tools import treePlotter
import operator
from collections import Counter


pre_pruning = True
post_pruning = True


bigData_api=Blueprint('bigData_api',__name__)

# 决策树通用函数
# -----------------------------------------------------------------------------------------
# 划分数据集
def splitdataset(dataset, axis, value):
    retdataset = []  # 创建返回的数据集列表
    for featVec in dataset:  # 抽取符合划分特征的值
        if featVec[axis] == value:
            reducedfeatVec = featVec[:axis]  # 去掉axis特征
            reducedfeatVec.extend(featVec[axis + 1:])  # 将符合条件的特征添加到返回的数据集列表
            retdataset.append(reducedfeatVec)
    return retdataset

# 计算信息熵
def cal_entropy(dataset):
    numEntries = len(dataset)
    labelCounts = {}
    # 给所有可能分类创建字典
    for featVec in dataset:
        currentlabel = featVec[-1]
        if currentlabel not in labelCounts.keys():
            labelCounts[currentlabel] = 0
        labelCounts[currentlabel] += 1
    Ent = 0.0
    for key in labelCounts:
        p = float(labelCounts[key]) / numEntries
        Ent = Ent - p * log(p, 2)  # 以2为底求对数
    return Ent

def majorityCnt(classList):
    '''
    数据集已经处理了所有属性，但是类标签依然不是唯一的，
    此时我们需要决定如何定义该叶子节点，在这种情况下，我们通常会采用多数表决的方法决定该叶子节点的分类
    '''
    classCont = {}
    for vote in classList:
        if vote not in classCont.keys():
            classCont[vote] = 0
        classCont[vote] += 1
    sortedClassCont = sorted(classCont.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCont[0][0]


# ID3算法
def ID3_chooseBestFeatureToSplit(dataset):
    numFeatures = len(dataset[0]) - 1
    baseEnt = cal_entropy(dataset)
    bestInfoGain = 0.0
    bestFeature = -1
    for i in range(numFeatures):  # 遍历所有特征
        # for example in dataset:
        # featList=example[i]
        featList = [example[i] for example in dataset]
        uniqueVals = set(featList)  # 将特征列表创建成为set集合，元素不可重复。创建唯一的分类标签列表
        newEnt = 0.0
        for value in uniqueVals:  # 计算每种划分方式的信息熵
            subdataset = splitdataset(dataset, i, value)
            p = len(subdataset) / float(len(dataset))
            newEnt += p * cal_entropy(subdataset)
        infoGain = baseEnt - newEnt
        print(u"ID3中第%d个特征的信息增益为：%.3f" % (i, infoGain))
        if (infoGain > bestInfoGain):
            bestInfoGain = infoGain  # 计算最好的信息增益
            bestFeature = i
    return bestFeature


# C4.5算法
def C45_chooseBestFeatureToSplit(dataset):
    numFeatures = len(dataset[0]) - 1
    baseEnt = cal_entropy(dataset)
    bestInfoGain_ratio = 0.0
    bestFeature = -1
    for i in range(numFeatures):  # 遍历所有特征
        featList = [example[i] for example in dataset]
        uniqueVals = set(featList)  # 将特征列表创建成为set集合，元素不可重复。创建唯一的分类标签列表
        newEnt = 0.0
        IV = 0.0
        for value in uniqueVals:  # 计算每种划分方式的信息熵
            subdataset = splitdataset(dataset, i, value)
            p = len(subdataset) / float(len(dataset))
            newEnt += p * cal_entropy(subdataset)
            IV = IV - p * log(p, 2)
        infoGain = baseEnt - newEnt
        if (IV == 0):  # fix the overflow bug
            continue
        infoGain_ratio = infoGain / IV  # 这个feature的infoGain_ratio
        print(u"C4.5中第%d个特征的信息增益率为：%.3f" % (i, infoGain_ratio))
        if (infoGain_ratio > bestInfoGain_ratio):  # 选择最大的gain ratio
            bestInfoGain_ratio = infoGain_ratio
            bestFeature = i  # 选择最大的gain ratio对应的feature
    return bestFeature


# CART算法
def CART_chooseBestFeatureToSplit(dataset):
    numFeatures = len(dataset[0]) - 1
    bestGini = 999999.0
    bestFeature = -1
    for i in range(numFeatures):
        featList = [example[i] for example in dataset]
        uniqueVals = set(featList)
        gini = 0.0
        for value in uniqueVals:
            subdataset = splitdataset(dataset, i, value)
            p = len(subdataset) / float(len(dataset))
            subp = len(splitdataset(subdataset, -1, '0')) / float(len(subdataset))
        gini += p * (1.0 - pow(subp, 2) - pow(1 - subp, 2))
        print(u"CART中第%d个特征的基尼值为：%.3f" % (i, gini))
        if (gini < bestGini):
            bestGini = gini
            bestFeature = i
    return bestFeature

def majorityCnt(classList):
    '''
    数据集已经处理了所有属性，但是类标签依然不是唯一的，
    此时我们需要决定如何定义该叶子节点，在这种情况下，我们通常会采用多数表决的方法决定该叶子节点的分类
    '''
    classCont = {}
    for vote in classList:
        if vote not in classCont.keys():
            classCont[vote] = 0
        classCont[vote] += 1
    sortedClassCont = sorted(classCont.items(), key=operator.itemgetter(1), reverse=True)
    return sortedClassCont[0][0]
def classifytest(inputTree, featLabels, testDataSet):
    """
    输入：决策树，分类标签，测试数据集
    输出：决策结果
    描述：跑决策树
    """
    classLabelAll = []
    for testVec in testDataSet:
        classLabelAll.append(classify(inputTree, featLabels, testVec))
    return classLabelAll


def cal_acc(test_output, label):
    """
    :param test_output: the output of testset
    :param label: the answer
    :return: the acc of
    """
    assert len(test_output) == len(label)
    count = 0
    for index in range(len(test_output)):
        if test_output[index] == label[index]:
            count += 1

    return float(count / len(test_output))

def classify(inputTree, featLabels, testVec):
    """
    输入：决策树，分类标签，测试数据
    输出：决策结果
    描述：跑决策树
    """
    firstStr = list(inputTree.keys())[0]
    secondDict = inputTree[firstStr]
    featIndex = featLabels.index(firstStr)
    classLabel = '0'
    for key in secondDict.keys():
        if testVec[featIndex] == key:
            if type(secondDict[key]).__name__ == 'dict':
                classLabel = classify(secondDict[key], featLabels, testVec)
            else:
                classLabel = secondDict[key]
    return classLabel


# 利用ID3算法创建决策树
def ID3_createTree(dataset, labels, test_dataset):
    classList = [example[-1] for example in dataset]
    if classList.count(classList[0]) == len(classList):
        # 类别完全相同，停止划分
        return classList[0]
    if len(dataset[0]) == 1:
        # 遍历完所有特征时返回出现次数最多的
        return majorityCnt(classList)
    bestFeat = ID3_chooseBestFeatureToSplit(dataset)
    bestFeatLabel = labels[bestFeat]
    print(u"此时最优索引为：" + (bestFeatLabel))

    ID3Tree = {bestFeatLabel: {}}
    del (labels[bestFeat])
    # 得到列表包括节点所有的属性值
    featValues = [example[bestFeat] for example in dataset]
    uniqueVals = set(featValues)

    if pre_pruning:
        ans = []
        for index in range(len(test_dataset)):
            ans.append(test_dataset[index][-1])
        result_counter = Counter()
        for vec in dataset:
            result_counter[vec[-1]] += 1
        leaf_output = result_counter.most_common(1)[0][0]
        root_acc = cal_acc(test_output=[leaf_output] * len(test_dataset), label=ans)
        outputs = []
        ans = []
        for value in uniqueVals:
            cut_testset = splitdataset(test_dataset, bestFeat, value)
            cut_dataset = splitdataset(dataset, bestFeat, value)
            for vec in cut_testset:
                ans.append(vec[-1])
            result_counter = Counter()
            for vec in cut_dataset:
                result_counter[vec[-1]] += 1
            leaf_output = result_counter.most_common(1)[0][0]
            outputs += [leaf_output] * len(cut_testset)
        cut_acc = cal_acc(test_output=outputs, label=ans)

        if cut_acc <= root_acc:
            return leaf_output

    for value in uniqueVals:
        subLabels = labels[:]
        ID3Tree[bestFeatLabel][value] = ID3_createTree(
            splitdataset(dataset, bestFeat, value),
            subLabels,
            splitdataset(test_dataset, bestFeat, value))

    if post_pruning:
        tree_output = classifytest(ID3Tree,
                                   featLabels=['年龄段', '有工作', '有自己的房子', '信贷情况'],
                                   testDataSet=test_dataset)
        ans = []
        for vec in test_dataset:
            ans.append(vec[-1])
        root_acc = cal_acc(tree_output, ans)
        result_counter = Counter()
        for vec in dataset:
            result_counter[vec[-1]] += 1
        leaf_output = result_counter.most_common(1)[0][0]
        cut_acc = cal_acc([leaf_output] * len(test_dataset), ans)

        if cut_acc >= root_acc:
            return leaf_output

    return ID3Tree

def C45_createTree(dataset, labels, test_dataset):
    classList = [example[-1] for example in dataset]
    if classList.count(classList[0]) == len(classList):
        # 类别完全相同，停止划分
        return classList[0]
    if len(dataset[0]) == 1:
        # 遍历完所有特征时返回出现次数最多的
        return majorityCnt(classList)
    bestFeat = C45_chooseBestFeatureToSplit(dataset)
    bestFeatLabel = labels[bestFeat]
    print(u"此时最优索引为：" + (bestFeatLabel))
    C45Tree = {bestFeatLabel: {}}
    del (labels[bestFeat])
    # 得到列表包括节点所有的属性值
    featValues = [example[bestFeat] for example in dataset]
    uniqueVals = set(featValues)

    if pre_pruning:
        ans = []
        for index in range(len(test_dataset)):
            ans.append(test_dataset[index][-1])
        result_counter = Counter()
        for vec in dataset:
            result_counter[vec[-1]] += 1
        leaf_output = result_counter.most_common(1)[0][0]
        root_acc = cal_acc(test_output=[leaf_output] * len(test_dataset), label=ans)
        outputs = []
        ans = []
        for value in uniqueVals:
            cut_testset = splitdataset(test_dataset, bestFeat, value)
            cut_dataset = splitdataset(dataset, bestFeat, value)
            for vec in cut_testset:
                ans.append(vec[-1])
            result_counter = Counter()
            for vec in cut_dataset:
                result_counter[vec[-1]] += 1
            leaf_output = result_counter.most_common(1)[0][0]
            outputs += [leaf_output] * len(cut_testset)
        cut_acc = cal_acc(test_output=outputs, label=ans)

        if cut_acc <= root_acc:
            return leaf_output

    for value in uniqueVals:
        subLabels = labels[:]
        C45Tree[bestFeatLabel][value] = C45_createTree(
            splitdataset(dataset, bestFeat, value),
            subLabels,
            splitdataset(test_dataset, bestFeat, value))

    if post_pruning:
        tree_output = classifytest(C45Tree,
                                   featLabels=['年龄段', '有工作', '有自己的房子', '信贷情况'],
                                   testDataSet=test_dataset)
        ans = []
        for vec in test_dataset:
            ans.append(vec[-1])
        root_acc = cal_acc(tree_output, ans)
        result_counter = Counter()
        for vec in dataset:
            result_counter[vec[-1]] += 1
        leaf_output = result_counter.most_common(1)[0][0]
        cut_acc = cal_acc([leaf_output] * len(test_dataset), ans)

        if cut_acc >= root_acc:
            return leaf_output

    return C45Tree

def CART_createTree(dataset, labels, test_dataset):
    classList = [example[-1] for example in dataset]
    if classList.count(classList[0]) == len(classList):
        # 类别完全相同，停止划分
        return classList[0]
    if len(dataset[0]) == 1:
        # 遍历完所有特征时返回出现次数最多的
        return majorityCnt(classList)
    bestFeat = CART_chooseBestFeatureToSplit(dataset)
    # print(u"此时最优索引为："+str(bestFeat))
    bestFeatLabel = labels[bestFeat]
    print(u"此时最优索引为：" + (bestFeatLabel))
    CARTTree = {bestFeatLabel: {}}
    del (labels[bestFeat])
    # 得到列表包括节点所有的属性值
    featValues = [example[bestFeat] for example in dataset]
    uniqueVals = set(featValues)

    if pre_pruning:
        ans = []
        for index in range(len(test_dataset)):
            ans.append(test_dataset[index][-1])
        result_counter = Counter()
        for vec in dataset:
            result_counter[vec[-1]] += 1
        leaf_output = result_counter.most_common(1)[0][0]
        root_acc = cal_acc(test_output=[leaf_output] * len(test_dataset), label=ans)
        outputs = []
        ans = []
        for value in uniqueVals:
            cut_testset = splitdataset(test_dataset, bestFeat, value)
            cut_dataset = splitdataset(dataset, bestFeat, value)
            for vec in cut_testset:
                ans.append(vec[-1])
            result_counter = Counter()
            for vec in cut_dataset:
                result_counter[vec[-1]] += 1
            leaf_output = result_counter.most_common(1)[0][0]
            outputs += [leaf_output] * len(cut_testset)
        cut_acc = cal_acc(test_output=outputs, label=ans)

        if cut_acc <= root_acc:
            return leaf_output

    for value in uniqueVals:
        subLabels = labels[:]
        CARTTree[bestFeatLabel][value] = CART_createTree(
            splitdataset(dataset, bestFeat, value),
            subLabels,
            splitdataset(test_dataset, bestFeat, value))

        if post_pruning:
            tree_output = classifytest(CARTTree,
                                       featLabels=['年龄段', '有工作', '有自己的房子', '信贷情况'],
                                       testDataSet=test_dataset)
            ans = []
            for vec in test_dataset:
                ans.append(vec[-1])
            root_acc = cal_acc(tree_output, ans)
            result_counter = Counter()
            for vec in dataset:
                result_counter[vec[-1]] += 1
            leaf_output = result_counter.most_common(1)[0][0]
            cut_acc = cal_acc([leaf_output] * len(test_dataset), ans)

            if cut_acc >= root_acc:
                return leaf_output

    return CARTTree


def read_dataset(filename):
    """
    年龄段：0代表青年，1代表中年，2代表老年；
    有工作：0代表否，1代表是；
    有自己的房子：0代表否，1代表是；
    信贷情况：0代表一般，1代表好，2代表非常好；
    类别(是否给贷款)：0代表否，1代表是
    """
    fr = open(filename, 'r')
    all_lines = fr.readlines()  # list形式,每行为1个str
    # print all_lines
    labels = ['年龄段', '有工作', '有自己的房子', '信贷情况']
    # featname=all_lines[0].strip().split(',')  #list形式
    # featname=featname[:-1]
    labelCounts = {}
    dataset = []
    for line in all_lines[0:]:
        line = line.strip().split(',')  # 以逗号为分割符拆分列表
        dataset.append(line)
    return dataset, labels
def read_testset(testfile):
    """
    年龄段：0代表青年，1代表中年，2代表老年；
    有工作：0代表否，1代表是；
    有自己的房子：0代表否，1代表是；
    信贷情况：0代表一般，1代表好，2代表非常好；
    类别(是否给贷款)：0代表否，1代表是
    """
    fr = open(testfile, 'r')
    all_lines = fr.readlines()
    testset = []
    for line in all_lines[0:]:
        line = line.strip().split(',')  # 以逗号为分割符拆分列表
        testset.append(line)
    return testset


# filename = r'D:\data\progm\py\dataAnalysis\app\tools\dataset.txt'
# testfile = r'D:\data\progm\py\dataAnalysis\app\tools\testset.txt'
# dataset, labels = read_dataset(filename)
#------------------------------------------------------------------------------------------

# ------------------------------------------------------------------------------------------

# 贝叶斯分析（朴素）-高斯
@bigData_api.route("/predict/bayes/gaussian",methods=["POST"])
def bayesAnalysisGauss():
    """
      实现朴素贝叶斯分类器，使用高斯分布作为概率密度函数

      参数：
      X：数组，表示训练数据，每行是一个样本，每列是一个特征
      y：数组，表示训练数据对应的标签
      x_test：数组，表示待分类的测试数据

      返回值：
      prior：字典，表示各个类别的先验概率
      posterior：字典，表示各个类别的后验概率
      pred_label：预测结果
      """
    X=np.array(json.loads(request.form.get("X")))
    y=np.array(json.loads(request.form.get("y")))
    x_test=np.array(json.loads(request.form.get("x_test")))


    classes = np.unique(y)  # 所有类别
    n_classes = len(classes)  # 类别个数
    n_features = X.shape[1]  # 特征个数
    prior = {}  # 先验概率
    posterior = {}  # 后验概率

    # 计算每个类别的先验概率
    for c in classes:
        prior[c] = np.sum(y == c) / len(y)

    # 计算每个类别在每个特征上的高斯分布的参数
    for c in classes:
        X_c = X[y == c]
        posterior[c] = {}
        for i in range(n_features):
            mean = np.mean(X_c[:, i])
            std = np.std(X_c[:, i])
            posterior[c][i] = (mean, std)

    # 计算后验概率
    pred_prob = {}
    for c in classes:
        pred_prob[c] = prior[c]
        for i in range(n_features):
            mean, std = posterior[c][i]
            pred_prob[c] *= norm.pdf(x_test[i], mean, std)

    # 得出预测结果
    pred_label = max(pred_prob, key=pred_prob.get)
    return {"prior":prior, "posterior":posterior, "pred_label":pred_label}
'''[[1.177138172473398, -0.565274337896019], [-0.431217529018185, 0.49207111754945515], [1.3377705586815947, -0.6422998672474199], [-0.8630787085337212, 1.123904524456276], [-0.4837080922258424, 0.13332763462552952], [0.33066363279972094, 0.35751528826598417], [-1.274835472518806, 1.107618689302429], [-0.2646564369919126, 0.9708991543590427], [0.4767357741634244, 0.9593450980596305], [-1.1223766493021403, -0.14419242581506334], [0.1789184443847115, 1.3565052762748415], [-0.248970466437203, 1.0791875471427697], [-0.8585242692731215, -0.8458697977137738], [0.7216682225651283, 0.253395110730819], [2.994121467079173, 1.5284165226606918], [-0.20094418919038287, -1.2252988684981319], [-0.28865852497588346, -1.2771253357191747], [1.1292263812195422, 0.3347747862357614], [-0.2276737823111657, -0.053254502310863004], [-0.8921810049872001, -1.2724039279888986], [0.3104191005659119, -0.6912233550971733], [1.3641617180682624, -2.2958970943253374], [0.0506758667660245, 0.9301887371868292], [-0.7320154534446447, 0.10377777101519735], [-0.7610619138561863, 1.046495546291496], [0.950302521865528, -0.16379434533227424], [1.1304004647802761, 0.6239001530557766], [-0.6458522848361581, -0.20142170419356797], [1.2057111113724466, -0.3654456424726306], [-0.599264986085628, -0.28910846786678746], [-0.40071640317663254, 1.4940477505092689], [-0.5623124192990564, 0.15285518354040448], [0.24842977771203142, 1.972450130367633], [-0.3157209218254359, 0.09611879180974388], [0.5844169276755397, -0.43888024092900274], [-0.6826448992780586, -2.030290636987287], [-0.17657233125449454, 0.9949435561685119], [-0.6962728689775617, 0.10575042127555995], [1.581316907427774, 0.06491101539020662], [0.17613383584409495, -0.3072497296247995], [0.33981589833682607, -0.1532562844180953], [0.40234255546462716, 0.2789588680019494], [1.1300217505038088, -1.8689848108475373], [1.1885060974345545, -0.7380433625667706], [0.13759921015661095, -0.039941924172302454], [-0.5613221863450446, 0.21904574744272934], [-0.9047448110777034, 0.5962052142941056], [-0.660305610590787, -1.2603275689349236], [-0.48282455783093475, 1.3381025128911195], [1.2589901005480135, 0.16445059639417037], [1.1016925704550264, 0.450322746511732], [-2.0945821008367997, -0.9685950958533315], [0.897140852296603, -0.8221159153804152], [-0.27321588022344545, 0.12885349038670355], [1.2577636258746372, -0.3642794018613568], [0.05638064869036868, -0.1246102811627717], [1.1843620615836414, -1.5494187070139296], [-1.356718234468078, 2.2754610620206766], [-0.2917992413073571, 1.2622162265770793], [1.1785296782906352, -0.30663383411674083], [0.9283326850837434, 2.3130065680088134], [-1.0882107048064593, -1.010342411981519], [-0.6715946485167135, 0.8031765164309552], [2.3972049581252572, -0.038423697772464536], [-0.9034187647217478, -1.0045655142534609], [-1.871912369391957, 0.3003765808745209], [2.29705970483979, 1.0574278618032045], [0.02642165608389564, 0.8230750933223269], [-0.8475063459442226, 0.8805911656065049], [1.3441957002793752, -1.8507619192775213], [-0.06314204106101212, -0.5642040955544981], [1.665281833601846, -0.9680319802246914], [-0.7391735092080167, 1.5100680424220025], [0.08356107591591584, -0.04848367020368557], [0.8666160666282376, 0.4311625565426353], [-1.538567278206092, 0.9378135775410609], [0.4095382493975098, 0.3431675009738997], [-0.050431118863949055, 0.8628520416740327], [-0.2510605902077834, 1.6774164964986138], [-0.4331362969234235, 1.8272992296429715], [2.1180545057613647, -0.5605860224924134], [0.1079104515122362, -0.7697690875526993], [-0.8750681755391453, 0.7824951909000751], [-0.15812913283575247, 0.40785001219304545], [0.8862946777054688, -0.6661148735360755], [-0.7339578532439517, -1.3675264190202312], [-0.47062523549800667, 0.3094453967540856], [-0.955726907996163, -0.32730031708821217], [1.4926550729281864, 0.8072154513475598], [-0.7372092950699108, -1.6613942078027186], [1.0188761047747272, -0.6844924752229372], [-0.05791491925724839, -1.017204445959148], [0.4081121215852674, -0.6785753459510311], [-2.4748861670570217, -0.05414664600052668], [0.2301235309450819, -1.2081311934083587], [0.6883473058430332, -0.9261280758796094], [-0.14960251009603295, -1.6742203641582563], [-0.16965935032080587, 1.3700029197509533], [0.37404437057510803, 2.905441664235825], [-1.1671044922309914, 0.24923923691203828]]'''
'''[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'''
'''[0, 0]'''

# 贝叶斯分析（朴素）-多项式
@bigData_api.route("/predict/bayes/multinomia",methods=["POST"])
def bayesAnalysisPolynomial():
    """
       实现朴素贝叶斯分类器，使用多项式分布作为概率密度函数

       参数：
       X：数组，表示训练数据，每行是一个样本，每列是一个特征
       y：数组，表示训练数据对应的标签
       x_test：数组，表示待分类的测试数据
       alpha：平滑参数，避免出现概率为0的情况，默认为1

       返回值：
       prior：字典，表示各个类别的先验概率
       posterior：字典，表示各个类别的后验概率
       pred_label：预测结果
       """
    X = np.array(json.loads(request.form.get("X")))
    y = np.array(json.loads(request.form.get("y")))
    x_test = np.array(json.loads(request.form.get("x_test")))
    alpha = 1
    classes = np.unique(y)  # 所有类别
    n_classes = len(classes)  # 类别个数
    n_features = X.shape[1]  # 特征个数
    prior = {}  # 先验概率
    posterior = {}  # 后验概率

    # 计算每个类别的先验概率
    for c in classes:
        prior[c] = np.sum(y == c) / len(y)

    # 计算每个类别在每个特征上的多项式分布的参数
    for c in classes:
        X_c = X[y == c]
        posterior[c] = {}
        for i in range(n_features):
            count = np.sum(X_c[:, i])
            n = X_c.shape[0]
            posterior[c][i] = (count + alpha) / (n + alpha * 2)

    # 计算后验概率
    pred_prob = {}
    for c in classes:
        pred_prob[c] = np.log(prior[c])
        for i in range(n_features):
            prob = posterior[c][i]
            if x_test[i] > 0:
                pred_prob[c] += np.log(prob) * x_test[i]

    # 得出预测结果
    pred_label = max(pred_prob, key=pred_prob.get)
    return {"prior":prior, "posterior":posterior, "pred_label":pred_label}
'''[[1.177138172473398, -0.565274337896019], [-0.431217529018185, 0.49207111754945515], [1.3377705586815947, -0.6422998672474199], [-0.8630787085337212, 1.123904524456276], [-0.4837080922258424, 0.13332763462552952], [0.33066363279972094, 0.35751528826598417], [-1.274835472518806, 1.107618689302429], [-0.2646564369919126, 0.9708991543590427], [0.4767357741634244, 0.9593450980596305], [-1.1223766493021403, -0.14419242581506334], [0.1789184443847115, 1.3565052762748415], [-0.248970466437203, 1.0791875471427697], [-0.8585242692731215, -0.8458697977137738], [0.7216682225651283, 0.253395110730819], [2.994121467079173, 1.5284165226606918], [-0.20094418919038287, -1.2252988684981319], [-0.28865852497588346, -1.2771253357191747], [1.1292263812195422, 0.3347747862357614], [-0.2276737823111657, -0.053254502310863004], [-0.8921810049872001, -1.2724039279888986], [0.3104191005659119, -0.6912233550971733], [1.3641617180682624, -2.2958970943253374], [0.0506758667660245, 0.9301887371868292], [-0.7320154534446447, 0.10377777101519735], [-0.7610619138561863, 1.046495546291496], [0.950302521865528, -0.16379434533227424], [1.1304004647802761, 0.6239001530557766], [-0.6458522848361581, -0.20142170419356797], [1.2057111113724466, -0.3654456424726306], [-0.599264986085628, -0.28910846786678746], [-0.40071640317663254, 1.4940477505092689], [-0.5623124192990564, 0.15285518354040448], [0.24842977771203142, 1.972450130367633], [-0.3157209218254359, 0.09611879180974388], [0.5844169276755397, -0.43888024092900274], [-0.6826448992780586, -2.030290636987287], [-0.17657233125449454, 0.9949435561685119], [-0.6962728689775617, 0.10575042127555995], [1.581316907427774, 0.06491101539020662], [0.17613383584409495, -0.3072497296247995], [0.33981589833682607, -0.1532562844180953], [0.40234255546462716, 0.2789588680019494], [1.1300217505038088, -1.8689848108475373], [1.1885060974345545, -0.7380433625667706], [0.13759921015661095, -0.039941924172302454], [-0.5613221863450446, 0.21904574744272934], [-0.9047448110777034, 0.5962052142941056], [-0.660305610590787, -1.2603275689349236], [-0.48282455783093475, 1.3381025128911195], [1.2589901005480135, 0.16445059639417037], [1.1016925704550264, 0.450322746511732], [-2.0945821008367997, -0.9685950958533315], [0.897140852296603, -0.8221159153804152], [-0.27321588022344545, 0.12885349038670355], [1.2577636258746372, -0.3642794018613568], [0.05638064869036868, -0.1246102811627717], [1.1843620615836414, -1.5494187070139296], [-1.356718234468078, 2.2754610620206766], [-0.2917992413073571, 1.2622162265770793], [1.1785296782906352, -0.30663383411674083], [0.9283326850837434, 2.3130065680088134], [-1.0882107048064593, -1.010342411981519], [-0.6715946485167135, 0.8031765164309552], [2.3972049581252572, -0.038423697772464536], [-0.9034187647217478, -1.0045655142534609], [-1.871912369391957, 0.3003765808745209], [2.29705970483979, 1.0574278618032045], [0.02642165608389564, 0.8230750933223269], [-0.8475063459442226, 0.8805911656065049], [1.3441957002793752, -1.8507619192775213], [-0.06314204106101212, -0.5642040955544981], [1.665281833601846, -0.9680319802246914], [-0.7391735092080167, 1.5100680424220025], [0.08356107591591584, -0.04848367020368557], [0.8666160666282376, 0.4311625565426353], [-1.538567278206092, 0.9378135775410609], [0.4095382493975098, 0.3431675009738997], [-0.050431118863949055, 0.8628520416740327], [-0.2510605902077834, 1.6774164964986138], [-0.4331362969234235, 1.8272992296429715], [2.1180545057613647, -0.5605860224924134], [0.1079104515122362, -0.7697690875526993], [-0.8750681755391453, 0.7824951909000751], [-0.15812913283575247, 0.40785001219304545], [0.8862946777054688, -0.6661148735360755], [-0.7339578532439517, -1.3675264190202312], [-0.47062523549800667, 0.3094453967540856], [-0.955726907996163, -0.32730031708821217], [1.4926550729281864, 0.8072154513475598], [-0.7372092950699108, -1.6613942078027186], [1.0188761047747272, -0.6844924752229372], [-0.05791491925724839, -1.017204445959148], [0.4081121215852674, -0.6785753459510311], [-2.4748861670570217, -0.05414664600052668], [0.2301235309450819, -1.2081311934083587], [0.6883473058430332, -0.9261280758796094], [-0.14960251009603295, -1.6742203641582563], [-0.16965935032080587, 1.3700029197509533], [0.37404437057510803, 2.905441664235825], [-1.1671044922309914, 0.24923923691203828]]'''
'''[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'''
'''[0, 0]'''


# 贝叶斯分析（朴素）-伯努利
@bigData_api.route("/predict/bayes/bernoulli",methods=["POST"])
def bayesAnalysisBernoulli():
    """
       实现朴素贝叶斯分类器，使用伯努利分布作为概率密度函数

       参数：
       X：数组，表示训练数据，每行是一个样本，每列是一个特征
       y：数组，表示训练数据对应的标签
       x_test：数组，表示待分类的测试数据

       返回值：
       prior：字典，表示各个类别的先验概率
       posterior：字典，表示各个类别的后验概率
       pred_label：预测结果
       """
    X = np.array(json.loads(request.form.get("X")))
    y = np.array(json.loads(request.form.get("y")))
    x_test = np.array(json.loads(request.form.get("x_test")))

    classes = np.unique(y)  # 所有类别
    n_classes = len(classes)  # 类别个数
    n_features = X.shape[1]  # 特征个数
    prior = {}  # 先验概率
    posterior = {}  # 后验概率

    # 计算每个类别的先验概率
    for c in classes:
        prior[c] = np.sum(y == c) / len(y)

    # 计算每个类别在每个特征上的伯努利分布的参数
    for c in classes:
        X_c = X[y == c]
        posterior[c] = {}
        for i in range(n_features):
            prob = np.sum(X_c[:, i] == 1) / len(X_c)
            posterior[c][i] = prob

    # 计算后验概率
    pred_prob = {}
    for c in classes:
        pred_prob[c] = prior[c]
        for i in range(n_features):
            prob = posterior[c][i]
            if x_test[i] == 1:
                pred_prob[c] *= prob
            else:
                pred_prob[c] *= 1 - prob

    # 得出预测结果
    pred_label = max(pred_prob, key=pred_prob.get)

    return {"prior":prior, "posterior":posterior, "pred_label":pred_label}
'''[[1.177138172473398, -0.565274337896019], [-0.431217529018185, 0.49207111754945515], [1.3377705586815947, -0.6422998672474199], [-0.8630787085337212, 1.123904524456276], [-0.4837080922258424, 0.13332763462552952], [0.33066363279972094, 0.35751528826598417], [-1.274835472518806, 1.107618689302429], [-0.2646564369919126, 0.9708991543590427], [0.4767357741634244, 0.9593450980596305], [-1.1223766493021403, -0.14419242581506334], [0.1789184443847115, 1.3565052762748415], [-0.248970466437203, 1.0791875471427697], [-0.8585242692731215, -0.8458697977137738], [0.7216682225651283, 0.253395110730819], [2.994121467079173, 1.5284165226606918], [-0.20094418919038287, -1.2252988684981319], [-0.28865852497588346, -1.2771253357191747], [1.1292263812195422, 0.3347747862357614], [-0.2276737823111657, -0.053254502310863004], [-0.8921810049872001, -1.2724039279888986], [0.3104191005659119, -0.6912233550971733], [1.3641617180682624, -2.2958970943253374], [0.0506758667660245, 0.9301887371868292], [-0.7320154534446447, 0.10377777101519735], [-0.7610619138561863, 1.046495546291496], [0.950302521865528, -0.16379434533227424], [1.1304004647802761, 0.6239001530557766], [-0.6458522848361581, -0.20142170419356797], [1.2057111113724466, -0.3654456424726306], [-0.599264986085628, -0.28910846786678746], [-0.40071640317663254, 1.4940477505092689], [-0.5623124192990564, 0.15285518354040448], [0.24842977771203142, 1.972450130367633], [-0.3157209218254359, 0.09611879180974388], [0.5844169276755397, -0.43888024092900274], [-0.6826448992780586, -2.030290636987287], [-0.17657233125449454, 0.9949435561685119], [-0.6962728689775617, 0.10575042127555995], [1.581316907427774, 0.06491101539020662], [0.17613383584409495, -0.3072497296247995], [0.33981589833682607, -0.1532562844180953], [0.40234255546462716, 0.2789588680019494], [1.1300217505038088, -1.8689848108475373], [1.1885060974345545, -0.7380433625667706], [0.13759921015661095, -0.039941924172302454], [-0.5613221863450446, 0.21904574744272934], [-0.9047448110777034, 0.5962052142941056], [-0.660305610590787, -1.2603275689349236], [-0.48282455783093475, 1.3381025128911195], [1.2589901005480135, 0.16445059639417037], [1.1016925704550264, 0.450322746511732], [-2.0945821008367997, -0.9685950958533315], [0.897140852296603, -0.8221159153804152], [-0.27321588022344545, 0.12885349038670355], [1.2577636258746372, -0.3642794018613568], [0.05638064869036868, -0.1246102811627717], [1.1843620615836414, -1.5494187070139296], [-1.356718234468078, 2.2754610620206766], [-0.2917992413073571, 1.2622162265770793], [1.1785296782906352, -0.30663383411674083], [0.9283326850837434, 2.3130065680088134], [-1.0882107048064593, -1.010342411981519], [-0.6715946485167135, 0.8031765164309552], [2.3972049581252572, -0.038423697772464536], [-0.9034187647217478, -1.0045655142534609], [-1.871912369391957, 0.3003765808745209], [2.29705970483979, 1.0574278618032045], [0.02642165608389564, 0.8230750933223269], [-0.8475063459442226, 0.8805911656065049], [1.3441957002793752, -1.8507619192775213], [-0.06314204106101212, -0.5642040955544981], [1.665281833601846, -0.9680319802246914], [-0.7391735092080167, 1.5100680424220025], [0.08356107591591584, -0.04848367020368557], [0.8666160666282376, 0.4311625565426353], [-1.538567278206092, 0.9378135775410609], [0.4095382493975098, 0.3431675009738997], [-0.050431118863949055, 0.8628520416740327], [-0.2510605902077834, 1.6774164964986138], [-0.4331362969234235, 1.8272992296429715], [2.1180545057613647, -0.5605860224924134], [0.1079104515122362, -0.7697690875526993], [-0.8750681755391453, 0.7824951909000751], [-0.15812913283575247, 0.40785001219304545], [0.8862946777054688, -0.6661148735360755], [-0.7339578532439517, -1.3675264190202312], [-0.47062523549800667, 0.3094453967540856], [-0.955726907996163, -0.32730031708821217], [1.4926550729281864, 0.8072154513475598], [-0.7372092950699108, -1.6613942078027186], [1.0188761047747272, -0.6844924752229372], [-0.05791491925724839, -1.017204445959148], [0.4081121215852674, -0.6785753459510311], [-2.4748861670570217, -0.05414664600052668], [0.2301235309450819, -1.2081311934083587], [0.6883473058430332, -0.9261280758796094], [-0.14960251009603295, -1.6742203641582563], [-0.16965935032080587, 1.3700029197509533], [0.37404437057510803, 2.905441664235825], [-1.1671044922309914, 0.24923923691203828]]'''
'''[1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]'''
'''[0, 0]'''


@bigData_api.route("/predict/bayes/categorical",methods=["POST"])
def bayesAnalysisQuasi():
    """
      实现类别朴素贝叶斯分类器，使用多项式分布作为概率密度函数

      参数：
      X：数组，表示训练数据，每行是一个样本，每列是一个特征
      y：数组，表示训练数据对应的标签
      x_test：数组，表示待分类的测试数据

      返回值：
      prior：字典，表示各个类别的先验概率
      posterior：字典，表示各个类别的后验概率
      pred_label：预测结果
      """
    X = np.array(json.loads(request.form.get("X")))
    y = np.array(json.loads(request.form.get("y")))
    x_test = np.array(json.loads(request.form.get("x_test")))

    classes = np.unique(y)  # 所有类别
    n_classes = len(classes)  # 类别个数
    n_features = X.shape[1]  # 特征个数
    prior = {}  # 先验概率
    posterior = {}  # 后验概率

    # 计算每个类别的先验概率
    for c in classes:
        prior[c] = np.sum(y == c) / len(y)

    # 计算每个类别在每个特征上的概率密度函数的参数
    for c in classes:
        X_c = X[y == c]
        posterior[c] = {}
        for i in range(n_features):
            freq = np.sum(X_c[:, i])
            posterior[c][i] = (freq + 1) / (np.sum(X_c) + n_features)

    # 计算后验概率
    pred_prob = {}
    for c in classes:
        pred_prob[c] = prior[c]
        for i in range(n_features):
            prob = posterior[c][i]
            pred_prob[c] *= prob ** x_test[i]

    # 得出预测结果
    pred_label = max(pred_prob, key=pred_prob.get)

    return {"prior":prior, "posterior":posterior, "pred_label":pred_label}


@bigData_api.route("/predict/bayes/complement",methods=["POST"])
def bayesAnalysisSupplementary ():
    """
    实现补充朴素贝叶斯分类器，使用伯努利分布作为概率密度函数

    参数：
    X：数组，表示训练数据，每行是一个样本，每列是一个特征
    y：数组，表示训练数据对应的标签
    x_test：数组，表示待分类的测试数据

    返回值：
    prior：字典，表示各个类别的先验概率
    posterior：字典，表示各个类别的后验概率
    pred_label：预测结果
    """
    X = np.array(json.loads(request.form.get("X")))
    y = np.array(json.loads(request.form.get("y")))
    x_test = np.array(json.loads(request.form.get("x_test")))

    classes = np.unique(y)  # 所有类别
    n_classes = len(classes)  # 类别个数
    n_features = X.shape[1]  # 特征个数
    prior = {}  # 先验概率
    posterior = {}  # 后验概率

    # 计算每个类别的先验概率
    for c in classes:
        prior[c] = np.sum(y == c) / len(y)

    # 计算每个类别在每个特征上的概率密度函数的参数
    for c in classes:
        X_c = X[y == c]
        posterior[c] = {}
        for i in range(n_features):
            freq = np.sum(X_c[:, i])
            posterior[c][i] = (freq + 1) / (np.sum(X_c) + 2)

    # 计算后验概率
    pred_prob = {}
    for c in classes:
        pred_prob[c] = prior[c]
        for i in range(n_features):
            if x_test[i] == 0:
                prob = 1 - posterior[c][i]
            else:
                prob = posterior[c][i]
            pred_prob[c] *= prob

    # 得出预测结果
    pred_label = max(pred_prob, key=pred_prob.get)

    return {"prior":prior, "posterior":posterior, "pred_label":pred_label}


@bigData_api.route("/predict/relation/apriori",methods=["POST"])
def association_analysis():
    transactions = ast.literal_eval(request.form.get("transactions"))
    min_support = float(request.form.get("min_support"))
    min_confidence=float(request.form.get("min_confidence"))
    # 将交易数据转换为布尔矩阵
    te = TransactionEncoder()
    te_ary = te.fit_transform(transactions)
    df = pd.DataFrame(te_ary, columns=te.columns_)
    # 计算频繁项集
    frequent_itemsets = apriori(df, min_support=min_support, use_colnames=True)

    # 计算关联规则
    rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=min_confidence)
    # 返回结果
    return rules[['antecedents', 'consequents', 'support', 'confidence', 'lift']].to_json(orient = "split", force_ascii = False)
# # 测试
# transactions = [['牛奶', '面包', '水果'],
#                 ['牛奶', '面包', '尿布', '啤酒', '鸡蛋'],
#                 ['面包', '水果', '尿布', '啤酒', '鸡蛋'],
#                 ['牛奶', '水果', '尿布', '啤酒'],
#                 ['牛奶', '面包', '水果', '尿布', '啤酒', '鸡蛋']]
#
# result = association_analysis(transactions, 0.3, 0.7)
# print(result)

@bigData_api.route("/predict/decisiontree/id3",methods=["POST"])
def id3Tree():
    dataset = request.form.get("dataset").replace('\'', '\"')
    dataset = json.loads(dataset)
    testSet = json.loads(request.form.get("testset").replace('\'', '\"'))
    labels = json.loads(request.form.get("labels").replace('\'', '\"'))

    print("Ent(D):", cal_entropy(dataset))
    print(u"ID3算法的最优特征索引为:" + str(ID3_chooseBestFeatureToSplit(dataset)))
    labels_tmp = labels[:]  # 拷贝，createTree会改变labels
    ID3desicionTree = ID3_createTree(dataset, labels_tmp,testSet)
    print('ID3desicionTree:\n', ID3desicionTree)
    base64_img = treePlotter.ID3_Tree(ID3desicionTree)

    print("下面为测试数据集结果：")
    ID3_TestSet_classifyResult = classifytest(ID3desicionTree, labels, testSet)
    print(ID3_TestSet_classifyResult )
    print('ID3_TestSet_classifyResult:\n', classifytest(ID3desicionTree, labels, testSet))

    return {"base64_img":base64_img,"ID3_TestSet_classifyResult":ID3_TestSet_classifyResult,"ID3desicionTree":ID3desicionTree}
# dataset [['0', '0', '0', '0', '0'], ['0', '0', '0', '1', '0'], ['0', '1', '0', '1', '1'], ['0', '1', '1', '0', '1'], ['0', '0', '0', '0', '0'], ['1', '0', '0', '0', '0'], ['1', '0', '0', '1', '0'], ['1', '1', '1', '1', '1'], ['1', '0', '1', '2', '1'], ['1', '0', '1', '2', '1'], ['2', '0', '1', '2', '1'], ['2', '0', '1', '1', '1'], ['2', '1', '0', '1', '1'], ['2', '1', '0', '2', '1'], ['2', '0', '0', '0', '0'], ['2', '0', '0', '2', '0']]
# labels ['年龄段', '有工作', '有自己的房子', '信贷情况']
# testset   [['0', '0', '0', '1', '0'], ['0', '1', '0', '1', '1'], ['1', '0', '1', '2', '1'], ['1', '0', '0', '1', '0'], ['2', '1', '0', '2', '1'], ['2', '0', '0', '0', '0'], ['2', '0', '0', '2', '0']]

@bigData_api.route("/predict/decisiontree/C45",methods=["POST"])
def c45Tree():
    dataset = request.form.get("dataset").replace('\'', '\"')
    dataset = json.loads(dataset)
    labels = json.loads(request.form.get("labels").replace('\'', '\"'))
    testSet = json.loads(request.form.get("testset").replace('\'', '\"'))


    print("Ent(D):", cal_entropy(dataset))
    print(u"C4.5算法的最优特征索引为:" + str(C45_chooseBestFeatureToSplit(dataset)))
    labels_tmp = labels[:]  # 拷贝，createTree会改变labels
    C45desicionTree = C45_createTree(dataset, labels_tmp,testSet)
    print('C45desicionTree:\n', C45desicionTree)
    base64_img = treePlotter.C45_Tree(C45desicionTree)
    print("下面为测试数据集结果：")
    C45_TestSet_classifyResult=classifytest(C45desicionTree, labels, testSet)
    return {"base64_img": base64_img, "C4.5_TestSet_classifyResult": C45_TestSet_classifyResult,
            "C45desicionTree": C45desicionTree}


@bigData_api.route("/predict/decisiontree/car",methods=["POST"])
def cardTree():
    dataset = request.form.get("dataset").replace('\'', '\"')
    dataset = json.loads(dataset)
    labels = json.loads(request.form.get("labels").replace('\'', '\"'))
    testSet = json.loads(request.form.get("testset").replace('\'', '\"'))

    print("Ent(D):", cal_entropy(dataset))
    print(u"CART算法的最优特征索引为:" + str(CART_chooseBestFeatureToSplit(dataset)))
    labels_tmp = labels[:]  # 拷贝，createTree会改变labels
    CARTdesicionTree = CART_createTree(dataset, labels_tmp,testSet)
    print('CARTdesicionTree:\n', CARTdesicionTree)
    base64_img = treePlotter.CART_Tree(CARTdesicionTree)

    print("下面为测试数据集结果：")
    CART_TestSet_classifyResult=classifytest(CARTdesicionTree, labels, testSet)
    return {"base64_img": base64_img, "CART_TestSet_classifyResult": CART_TestSet_classifyResult,
            "ID3desicionTree": CARTdesicionTree}


@bigData_api.route("/predict/cluster/calCluster",methods=["POST"])
def clusteringAnalysis():

    X =np.array(json.loads(request.form.get("X")))
    n_clusters = int(request.form.get("n_clusters"))


    # 聚类分析
    kmeans = KMeans(n_clusters=n_clusters, random_state=0)
    labels = kmeans.fit_predict(X)

    # 可视化聚类结果
    fig = plt.figure()
    plt.scatter(X[labels == 0, 0], X[labels == 0, 1], s=50, c='red', label='Cluster 1')
    plt.scatter(X[labels == 1, 0], X[labels == 1, 1], s=50, c='blue', label='Cluster 2')
    plt.scatter(X[labels == 2, 0], X[labels == 2, 1], s=50, c='green', label='Cluster 3')
    plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], s=100, c='black', label='Centroids')
    plt.title('KMeans Clustering')
    plt.xlabel('Feature 1')
    plt.ylabel('Feature 2')
    plt.legend()

    # 将图片对象转化成字节流
    buf = io.BytesIO()
    fig.savefig(buf, format='png')
    buf.seek(0)
    # 将字节流转化成base64编码
    img_base64 = "data:image/png;base64," + base64.b64encode(buf.read()).decode('utf-8')
    # 清除内存中的图片对象
    buf.close()
    plt.close(fig)
    # 返回聚类结果
    return {"labels":labels.tolist(),"base64":img_base64}
# 测试案例
# X=[[0.43607448243390645, 0.3461603331770746], [0.5486455774918573, 0.5952995904860655], [0.05093885183222324, 0.6189780646034281], [0.42298138709240374, 0.7788634712616015], [0.6420465994600655, 0.10448249964985212], [0.3851574100518662, 0.059757666875881355], [0.5988171383774621, 0.6539327080911543], [0.7355668879159971, 0.430137242252135], [0.08570119436351908, 0.9174332125188572], [0.5781193469454791, 0.6103602318371797], [0.5232464925643712, 0.49336134247847707], [0.8024944168779705, 0.5590909127598485], [0.16655574019395392, 0.926905398162483], [0.7485611802721388, 0.16286222642864168], [0.9635043592725585, 0.5509485315270205], [0.522853996757909, 0.13067973386039022], [0.9790465232866424, 0.6803989766406773], [0.4376842130471299, 0.5910474079798916], [0.798442588001777, 0.9908166974748768], [0.13063800227650868, 0.1240624638955099], [0.6620707155416806, 0.7637429992629219], [0.5970279088133822, 0.23469273096237175], [0.20623335577409752, 0.9041673149624257], [0.8818288408106731, 0.8073360372501139], [0.2811509568241639, 0.954899003654698], [0.530089159105231, 0.7709656473769029], [0.36040923493547206, 0.9599877288590439], [0.12505453191149196, 0.35535507456214166], [0.8008524769659793, 0.5051286454833988], [0.6534905729321385, 0.19082123052279398], [0.5956861889207625, 0.8679206424127349], [0.6348097373898909, 0.751809660599553], [0.9973230916772285, 0.6717986140930619], [0.6657471729676062, 0.25096921456057253], [0.47361236915393523, 0.19147913046125153], [0.43620950006919945, 0.008838238761477712], [0.9649409859951752, 0.4456810753146926], [0.2829979516948551, 0.6276777263882178], [0.9962700154405579, 0.003576695081208503], [0.45165723786118406, 0.8674153112248915], [0.6263447123675746, 0.04530109191466414], [0.7515743377704193, 0.4273896123595601], [0.063895711863589, 0.32931923469789914], [0.14480119953411397, 0.7164006512801], [0.11834503768363325, 0.04986346005104192], [0.7028139404043661, 0.241743991302533], [0.1192747995801251, 0.3273213398688636], [0.31949018364904325, 0.36143516696457556], [0.10462365861027356, 0.2937950908327903], [0.48039945981247767, 0.5018074537598798]]
# n_clusters = 2



@bigData_api.route("/predict/Neuralnet/train",methods=["POST"])
def neuralNetworkTrain():

    samples = np.array(json.loads(request.form.get("samples")))
    hidden_layers = int(request.form.get("hidden_layers"))
    layer_info = json.loads(request.form.get("layer_info"))
    model_name=request.form.get(("model_name"))
    # 创建模型
    model = tf.keras.Sequential()
    # 添加输入层
    model.add(tf.keras.layers.InputLayer(input_shape=(samples.shape[1],)))
    # 添加隐藏层
    for i in range(hidden_layers):
        model.add(tf.keras.layers.Dense(layer_info[i], activation='relu'))
    # 添加输出层
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    # 编译模型
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

    # 训练模型
    model.fit(samples, samples[:, 0], epochs=10)

    # 保存模型
    try:
        model.save(model_name)
        return "True"
    except:
        return "False"



@bigData_api.route("/predict/Neuralnet/test",methods=["POST"])
def neuralNetworkTest():
    """
    使用神经网络进行预测
    参数：
    X_train: 训练数据特征，二维数组，每行为一个样本的特征向量
    y_train: 训练数据标签，一维数组，每个元素为对应样本的标签
    X_test: 待预测数据特征，二维数组，每行为一个样本的特征向量
    model_name: 模型名称，字符串类型
    返回：
    y_pred: 预测结果，一维数组，每个元素为对应样本的预测结果
    """
    X_train = json.loads(request.form.get("X_train"))
    y_train = json.loads(request.form.get("y_train"))
    X_test = json.loads(request.form.get("X_test"))

    # 训练模型
    model = MLPRegressor(hidden_layer_sizes=(10, ), solver='lbfgs', random_state=1)
    model.fit(X_train, y_train)
    # 保存模型
    # pickle.dump(model, open(filename, 'wb'))
    # 预测
    y_pred = model.predict(X_test)
    return {"y_pred":y_pred.tolist()}
# 测试案例
# import numpy as np
#
# # 构造训练数据
# X_train = [[0, 0], [1, 1]]
# y_train = [0, 1]
# # 构造待预测数据
# X_test = [[2, 2], [-1, -1]]


@bigData_api.route("/predict/logicalRegression/train",methods=["POST"])
def logisticRegressionTrain():
    """
       参数：
       X -- 样本数组，形状为 (m, n)，其中 m 是样本数量，n 是特征数量
       y -- 目标变量数组，形状为 (m, 1)
       model_name -- 模型名称，用于输出方程
       返回值：
       coefficients -- 回归系数，形状为 (1, n)
       equation -- 回归方程
       accuracy -- 预测准确度
       """
    X = request.form.get("X")
    X = np.array(json.loads(X))
    y=request.form.get("y")
    y= np.array(json.loads(y))

    # 创建逻辑回归模型
    model = LogisticRegression()

    # 训练模型
    model.fit(X, y)

    # 获取回归系数和方程
    coefficients = model.coef_
    intercept = model.intercept_
    equation = f" y = {intercept[0]} + {coefficients[0][0]}*x1 + {coefficients[0][1]}*x2"

    # 计算预测准确度
    y_pred = model.predict(X)
    accuracy = np.mean(y_pred == y)
    # 返回结果
    return {"coefficients":coefficients.tolist(), "equation":equation, "accuracy":accuracy}
'''
[[1.764052345967664, 0.4001572083672233], [0.9787379841057392, 2.240893199201458], [1.8675579901499675, -0.977277879876411], [0.9500884175255894, -0.1513572082976979], [-0.10321885179355784, 0.41059850193837233], [0.144043571160878, 1.454273506962975], [0.7610377251469934, 0.12167501649282841], [0.44386323274542566, 0.33367432737426683], [1.4940790731576061, -0.20515826376580087], [0.31306770165090136, -0.8540957393017248], [-2.5529898158340787, 0.6536185954403606], [0.8644361988595057, -0.7421650204064419], [2.2697546239876076, -1.4543656745987648], [0.04575851730144607, -0.1871838500258336], [1.5327792143584575, 1.469358769900285], [0.1549474256969163, 0.37816251960217356], [-0.8877857476301128, -1.980796468223927], [-0.3479121493261526, 0.15634896910398005], [1.2302906807277207, 1.2023798487844113], [-0.3873268174079523, -0.30230275057533557], [-1.0485529650670926, -1.4200179371789752], [-1.7062701906250126, 1.9507753952317897], [-0.5096521817516535, -0.4380743016111864], [-1.2527953600499262, 0.7774903558319101], [-1.6138978475579515, -0.2127402802139687], [-0.8954665611936756, 0.386902497859262], [-0.510805137568873, -1.180632184122412], [-0.028182228338654868, 0.42833187053041766], [0.06651722238316789, 0.3024718977397814], [-0.6343220936809636, -0.3627411659871381], [-0.672460447775951, -0.3595531615405413], [-0.813146282044454, -1.7262826023316769], [0.17742614225375283, -0.4017809362082619], [-1.6301983469660446, 0.4627822555257742], [-0.9072983643832422, 0.05194539579613895], [0.7290905621775369, 0.12898291075741067], [1.1394006845433007, -1.2348258203536526], [0.402341641177549, -0.6848100909403132], [-0.8707971491818818, -0.5788496647644155], [-0.31155253212737266, 0.05616534222974544], [-1.1651498407833565, 0.9008264869541871], [0.46566243973045984, -1.5362436862772237], [1.4882521937955997, 1.8958891760305832], [1.1787795711596507, -0.17992483581235091], [-1.0707526215105425, 1.0544517269311366], [-0.40317694697317963, 1.2224450703824274], [0.2082749780768603, 0.9766390364837128], [0.3563663971744019, 0.7065731681919482], [0.010500020720820478, 1.7858704939058352], [0.12691209270361992, 0.40198936344470165], [1.8831506970562544, -1.3477590611424464], [-1.2704849984857336, 0.9693967081580112], [-1.17312340511416, 1.9436211856492926], [-0.41361898075974735, -0.7474548114407578], [1.9229420264803847, 1.4805147914344243], [1.8675589604265699, 0.9060446582753853], [-0.8612256850547025, 1.9100649530990337], [-0.2680033709513804, 0.8024563957963952], [0.947251967773748, -0.1550100930908342], [0.6140793703460803, 0.9222066715665268], [0.37642553115562943, -1.0994007905841945], [0.298238174206056, 1.3263858966870303], [-0.6945678597313655, -0.14963454032767076], [-0.43515355172163744, 1.8492637284793418], [0.6722947570124355, 0.40746183624111043], [-0.7699160744453164, 0.5392491912918173], [-0.6743326606573761, 0.03183055827435118], [-0.635846078378881, 0.6764332949464997], [0.5765908166149409, -0.20829875557799488], [0.3960067126616453, -1.0930615087305058], [-1.4912575927056055, 0.4393917012645369], [0.16667349537252904, 0.6350314368921064], [2.383144774863942, 0.9444794869904138], [-0.9128222254441586, 1.117016288095853], [-1.3159074105115212, -0.461584604814709], [-0.06824160532463124, 1.7133427216493666], [-0.7447548220484399, -0.8264385386590144], [-0.0984525244254323, -0.6634782863621074], [1.126635922106507, -1.0799315083634233], [-1.1474686524111024, -0.43782004474443403], [-0.4980324506923049, 1.9295320538169858], [0.9494208069257608, 0.0875512413851909], [-1.225435518830168, 0.8443629764015471], [-1.0002153473895647, -1.5447710967776116], [1.1880297923523018, 0.3169426119248496], [0.920858823780819, 0.3187276529430212], [0.8568306119026912, -0.6510255933001469], [-1.0342428417844647, 0.681594518281627], [-0.8034096641738411, -0.6895497777502005], [-0.45553250351734315, 0.01747915902505673], [-0.35399391125348395, -1.3749512934180188], [-0.6436184028328905, -2.2234031522244266], [0.6252314510271875, -1.6020576556067476], [-1.1043833394284506, 0.052165079260974405], [-0.7395629963913133, 1.5430145954067358], [-1.2928569097234486, 0.26705086934918293], [-0.0392828182274956, -1.1680934977411974], [0.5232766605317537, -0.1715463312222481], [0.7717905512136674, 0.8235041539637314], [2.16323594928069, 1.336527949436392]]
[[0], [1], [0], [1], [0], [1], [0], [1], [1], [0], [0], [1], [0], [1], [0], [1], [0], [0], [0], [0], [1], [1], [0], [1], [0], [1], [1], [0], [1], [0], [0], [0], [0], [0], [1], [1], [0], [0], [0], [0], [1], [1], [0], [0], [1], [0], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [1], [0], [0], [1], [1], [1], [1], [0], [0], [1], [0], [0], [1], [0], [1], [0], [1], [0], [0], [1], [0], [0], [0], [0], [0], [1], [0], [1], [0], [0], [1], [1], [1], [1], [0], [0], [1], [0], [1], [1], [1], [1], [0], [0]]
'''


@bigData_api.route("/predict/logicalRegression/test",methods=["POST"])
def logisticRegressionTest():
    X=request.form.get("X")
    X= pd.DataFrame(json.loads(X))

    y=request.form.get("y")
    y= pd.Series(json.loads(y))
    X_test=request.form.get("X_test")
    X_test=pd.DataFrame(json.loads(X_test))

    # 创建逻辑回归模型
    lr = LogisticRegression()

    # 拟合模型
    lr.fit(X, y)

    # 使用训练好的模型进行预测
    y_pred = lr.predict(X_test)

    return {"y_pred": y_pred.tolist()}

# X_train = {'Feature 1': [1, 2, 3, 4], 'Feature 2': [0, 1, 0, 1]}
# y_train = [0, 1, 0, 1]
# X_test = {'Feature 1': [2.5, 1.5], 'Feature 2': [1, 0]}


# 信度分析
@bigData_api.route("/test/cronbach/check",methods=["POST"])
def reliabilityAnalysis():
    """
    计算信度系数和结论

    参数：
    data: 二维numpy数组，每行表示一次实验的结果，每列表示不同的测量指标

    返回值：
    reliability: 一维numpy数组，每个元素表示对应测量指标的信度系数
    conclusion: 字符串，结论
    """
    data = request.form.get("data")
    data = json.loads(data)
    data = np.array(data)

    # 计算各个测量指标的方差
    variances = np.var(data, axis=0)

    # 计算总方差
    total_variance = np.sum(variances)

    # 计算误差方差
    error_variance = total_variance / len(data)

    # 计算信度系数
    reliability = 1 - (error_variance / variances)

    # 判断结论
    if np.all(reliability >= 0.7):
        conclusion = "信度系数较高，测量结果可靠。"
    elif np.all(reliability >= 0.5):
        conclusion = "信度系数一般，测量结果需要谨慎解释。"
    else:
        conclusion = "信度系数较低，测量结果不可靠。"

    return {"reliability":reliability.tolist(), "conclusion":conclusion}
# 测试案例
# data = np.array([
#     [1.2, 2.5, 3.1],
#     [1.4, 2.3, 3.3],
#     [1.3, 2.4, 3.2],
#     [1.1, 2.6, 3.0],
#     [1.0, 2.7, 2.9]
# ])
#


